1 Air Pollution in Małopolska

1.1 Loading Data

1.1.1 Loading libraries

library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(tmap)
library(tmaptools)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(here)
## here() starts at /Users/Szymon1/Documents/CASA/Modules/GIS/repo/GIS_code/Assigment
library(MazamaSpatialUtils)
## Loading required package: sp
## 
## Attaching package: 'MazamaSpatialUtils'
## The following object is masked from 'package:purrr':
## 
##     simplify
library(MazamaCoreUtils)
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(sp)
library(PWFSLSmoke)
library(con2aqi)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(ggplot2)
library(raster)
## 
## Attaching package: 'raster'
## The following object is masked from 'package:PWFSLSmoke':
## 
##     distance
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:tidyr':
## 
##     extract
library(janitor)
## 
## Attaching package: 'janitor'
## The following object is masked from 'package:raster':
## 
##     crosstab
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(gstat)
library(con2aqi)
library(broom)
library(dplyr)
library(elevatr)
library(spdep)
## Loading required package: spData
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift

1.1.2 Loading spatial data

#spatial
gminy <- st_read(here::here("ass_data", 
                                  "geo", 'jednostki_administracyjne',
                                  "Gminy.shp"))
## Reading layer `Gminy' from data source `/Users/Szymon1/Documents/CASA/Modules/GIS/repo/GIS_code/Assigment/ass_data/geo/jednostki_administracyjne/Gminy.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2477 features and 29 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 14.12288 ymin: 49.00205 xmax: 24.14578 ymax: 54.83642
## geographic CRS: ETRS89
wojewodztwa<- st_read(here::here("ass_data", 
                                   "geo", 'jednostki_administracyjne',
                                   "Wojewodztwa.shp"))
## Reading layer `Wojewodztwa' from data source `/Users/Szymon1/Documents/CASA/Modules/GIS/repo/GIS_code/Assigment/ass_data/geo/jednostki_administracyjne/Wojewodztwa.shp' using driver `ESRI Shapefile'
## Simple feature collection with 16 features and 29 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 14.12288 ymin: 49.00205 xmax: 24.14578 ymax: 54.83642
## geographic CRS: ETRS89
powiaty<- st_read(here::here("ass_data", 
                                 "geo", 'jednostki_administracyjne',
                                 "Powiaty.shp"))
## Reading layer `Powiaty' from data source `/Users/Szymon1/Documents/CASA/Modules/GIS/repo/GIS_code/Assigment/ass_data/geo/jednostki_administracyjne/Powiaty.shp' using driver `ESRI Shapefile'
## Simple feature collection with 380 features and 29 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 14.12288 ymin: 49.00205 xmax: 24.14578 ymax: 54.83642
## geographic CRS: ETRS89
country<- st_read(here::here("ass_data", 
                             "geo", 'jednostki_administracyjne',
                             "Panstwo.shp"))
## Reading layer `Panstwo' from data source `/Users/Szymon1/Documents/CASA/Modules/GIS/repo/GIS_code/Assigment/ass_data/geo/jednostki_administracyjne/Panstwo.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 29 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 14.06964 ymin: 49.00205 xmax: 24.14578 ymax: 55.03605
## geographic CRS: ETRS89
lesserpoland <- wojewodztwa %>% 
  filter(JPT_NAZWA_=='małopolskie')%>% 
  st_transform(., 2180)

powiaty_LP <- powiaty %>% 
  filter(str_detect(JPT_KOD_JE,'^12'))%>% 
  st_transform(., 2180)

gminy_LP <- gminy %>% 
  filter(str_detect(JPT_KOD_JE,'^12'))%>% 
  st_transform(., 2180)

gminy_codes <- gminy_LP %>% 
  dplyr::select(.,"JPT_KOD_JE","JPT_NAZWA_") %>% 
  st_drop_geometry() %>% 
  mutate(JPT_KOD_JE = as.numeric(JPT_KOD_JE))

gminy_LP_grouped<-gminy_LP %>% 
  group_by(JPT_KOD_JE) %>% 
  group_split()

1.2 Data collection

### Pollution

Stations <- read_csv(here::here("ass_data", 
                                "csv", 
                                "Stacje.csv"))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Nr = col_double(),
##   `Kod stacji` = col_character(),
##   `Kod międzynarodowy` = col_character(),
##   `Nazwa stacji` = col_character(),
##   `Stary Kod stacji` = col_character(),
##   `Data uruchomienia` = col_character(),
##   `Data zamknięcia` = col_character(),
##   `Typ stacji` = col_character(),
##   `Typ obszaru` = col_character(),
##   `Rodzaj stacji` = col_character(),
##   Województwo = col_character(),
##   Miejscowość = col_character(),
##   Ulica = col_character(),
##   `WGS84 φ N` = col_double(),
##   `WGS84 λ E` = col_double()
## )
# Following piece of code is commented out because running it might take up to 1h 
# The output of this section of the code is the data frame called "pollution_yearly"
# which consist annual reading of the eight pollutants ("pm10","pm25","so2","o3","nox","no2","co","c6h6")
# from the period of 2000-2019 for all the 182 subregions in malopolska

#### Loading all pollutants 2000-2019 data

pollutants <- c("pm10","pm25","so2","o3","nox","no2","co","c6h6")

pollutants_csv <- list()

for(i in pollutants){
  #loading csv for whole country and slicing first row
  filepath <- file.path(here::here("ass_data","csv"),paste(toupper(i),".csv",sep=""))
  assign(sprintf("%s_poland",i), slice(read_csv(filepath),-1))
  pollutants_csv[[match(i,pollutants)]] <- sprintf("%s_poland",i)
}
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Rok = col_character(),
##   Województwo = col_character(),
##   `Kod strefy` = col_character(),
##   `Nazwa strefy` = col_character(),
##   `Kod stacji` = col_character(),
##   Wskaźnik = col_character(),
##   `Czas uśredniania` = col_character(),
##   Średnia = col_character(),
##   Min = col_character(),
##   Maks = col_character(),
##   `L>50 (S24)` = col_character(),
##   `36 maks. (S24)` = col_character(),
##   `Perc. 90.4 (S24)` = col_character(),
##   `Maks (S24)` = col_character(),
##   `Liczba pomiarów` = col_character(),
##   Kompletność = col_character(),
##   `Liczba Lato/Zima` = col_character()
## )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Rok = col_character(),
##   Województwo = col_character(),
##   `Kod strefy` = col_character(),
##   `Nazwa strefy` = col_character(),
##   `Kod stacji` = col_character(),
##   Wskaźnik = col_character(),
##   `Czas uśredniania` = col_character(),
##   Średnia = col_character(),
##   Min = col_character(),
##   Maks = col_character(),
##   `Liczba pomiarów` = col_character(),
##   Kompletność = col_character(),
##   `Liczba Lato/Zima` = col_character()
## )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Rok = col_character(),
##   Województwo = col_character(),
##   `Kod strefy` = col_character(),
##   `Nazwa strefy` = col_character(),
##   `Kod stacji` = col_character(),
##   Wskaźnik = col_character(),
##   `Czas uśredniania` = col_character(),
##   Średnia = col_character(),
##   Min = col_character(),
##   Maks = col_character(),
##   `Liczba pomiarów` = col_character(),
##   Kompletność = col_character(),
##   `Liczba Lato/Zima` = col_character()
## )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Rok = col_character(),
##   Województwo = col_character(),
##   `Kod strefy` = col_character(),
##   `Nazwa strefy` = col_character(),
##   `Kod stacji` = col_character(),
##   Wskaźnik = col_character(),
##   `Czas uśredniania` = col_character(),
##   Średnia = col_character(),
##   Min = col_character(),
##   Maks = col_character(),
##   `L>200 (S1)` = col_character(),
##   `19 maks. (S1)` = col_character(),
##   `Perc. 99.8 (S1)` = col_character(),
##   `Liczba pomiarów` = col_character(),
##   Kompletność = col_character(),
##   `Liczba Lato/Zima` = col_character()
## )
## Warning: Missing column names filled in: 'X15' [15], 'X16' [16], 'X17' [17],
## 'X18' [18], 'X19' [19], 'X20' [20]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   X15 = col_logical(),
##   X16 = col_logical(),
##   X17 = col_logical(),
##   X18 = col_logical(),
##   X19 = col_logical(),
##   X20 = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Rok = col_character(),
##   Województwo = col_character(),
##   `Kod strefy` = col_character(),
##   `Nazwa strefy` = col_character(),
##   `Kod stacji` = col_character(),
##   Wskaźnik = col_character(),
##   `Czas uśredniania` = col_character(),
##   Średnia = col_character(),
##   Min = col_character(),
##   Maks = col_character(),
##   `Liczba pomiarów` = col_character(),
##   Kompletność = col_character(),
##   `Liczba Lato/Zima` = col_character()
## )
# ##### Joining pollution readings with the station data and converting it to SF
# 
# for (i in pollutants_csv) {
#   # joining with stations dataframe by station code
#   filename <- eval(parse(text = i))
#   assign(sprintf("%s_sf",i), left_join(filename,Stations,
#                                        by = c("Kod stacji" = "Kod stacji"))) 
#   # droping na from "WGS84 φ N" column
#   filename_2 <- eval(parse(text = sprintf("%s_sf",i)))
#   assign(sprintf("%s_sf",i), drop_na(filename_2,"WGS84 φ N") ) 
#   # converting to SF
#   filename_2 <- eval(parse(text = sprintf("%s_sf",i)))
#   assign(sprintf("%s_sf",i), st_as_sf(filename_2, coords = c("WGS84 λ E","WGS84 φ N"), crs = 4326)) 
# }
# 
# #####  putting all pollutants SFs to a list
# pollutants_sf<- list(pm10_poland_sf,pm25_poland_sf,co_poland_sf,no2_poland_sf,
#                      nox_poland_sf,so2_poland_sf,c6h6_poland_sf,o3_poland_sf)
# #### Interpolation
# 
# ##### SP outline od Malopolska
# ls_border <- lesserpoland %>%
#   st_transform(., 2180) %>% 
#   as(., 'Spatial')
# 
# ##### SF -> SP yearly measurments  - All pollutants
# pollutants_yearly <- lapply(pollutants_sf,function(x) group_split(x,Rok) ) 
# 
# pollutants_yearly_sp <- lapply(pollutants_yearly,
#                                function(x) lapply(x,
#                                                   function(y) as(st_transform(y, 2180), 'Spatial')))
# 
# ##### creating a grind
# emptygrd <- as.data.frame(spsample(ls_border, n=1000, type="regular", cellsize=1000))
# 
# names(emptygrd) <- c("X", "Y")
# 
# coordinates(emptygrd) <- c("X", "Y")
# 
# gridded(emptygrd) <- TRUE  # Create SpatialPixel object
# fullgrid(emptygrd) <- TRUE  # Create SpatialGrid object
# 
# ##### creating raster with yearly measurments from 2000-2019 for all pollutants
# 
# pollutants_yearly_ras<- list()
# 
# for (p in pollutants_yearly_sp) {
#   # Add the projection to the grid
#   proj4string(emptygrd)<- proj4string(p[[1]])
#   # Interpolate the grid cells using a power value of 2 
#   assign("interpolate_list", lapply(p,function(i) gstat::idw(Średnia ~ 1, i, newdata=emptygrd, idp=2.0)))
#   # Convert output to raster object 
#   assign("ras", lapply(interpolate_list,function(i) raster(i)))
#   # Clip the raster to Lesserpoland outline
#   assign("ras_masked" ,lapply(ras,function(i) raster::mask(i, ls_border)))
#   # add to the pollutants_yearly_ras
#   pollutants_yearly_ras<-append(pollutants_yearly_ras,list(ras_masked))
# } 
# 
# ##### Plot the raster - test
# # plot(pollutants_yearly_ras[[8]][[20]])
# 
# #####  extracting values FOR EVERY REGION from a raster and adding it to dataframe 
# pollutants_yearly_mean<- list()
# 
# for (p in pollutants_yearly_ras) {
#   assign("pollutants_yearly_gminy",  lapply(p, function(y) 
#     lapply(gminy_LP_grouped, function(g)       
#       raster::extract(y,g,small=TRUE))))
#   pollutants_yearly_mean<-append(pollutants_yearly_mean,list(pollutants_yearly_gminy))
# } 
# 
# ##### calculation annual mean for every region
# pollutants_yearly_gminy_mean<-  pollutants_yearly_mean %>% 
#   lapply(., function(p)
#     lapply(p,function(y) 
#       lapply(y,function(g) 
#         mean(as.numeric(as.character(unlist(g))))
#       )))
# 
# #####transfering to data frame
# 
# gminy_codes <- gminy_LP %>% 
#   dplyr::select(.,"JPT_KOD_JE","JPT_NAZWA_") %>% 
#   st_drop_geometry() %>% 
#   mutate(JPT_KOD_JE = as.numeric(JPT_KOD_JE)) %>% 
#   arrange(.,JPT_KOD_JE)
# 
# gminy_codes_v<- as.vector(gminy_codes$JPT_KOD_JE)
# 
# ##### naming list elements + convert to the final data frame
# df <- pollutants_yearly_gminy_mean %>% 
#   lapply(., function(p) 
#     lapply(p,function(y) 
#       as.data.frame(y))) %>% 
#   lapply(., function(p) 
#     lapply(p, function(y) y %>% 
#              rename_at(vars(colnames(y)), function(x) gminy_codes_v) %>% 
#              pivot_longer(everything())))
# 
# names(df) <- c("pm10","pm25","co","no2","nox","so2","c6h6","o3")
# 
# for (i in c(1,4,5,6,8)) {
#   names(df[[i]]) <- as.character(unlist(seq(2000,2019,by=1)))
#   names(df[[2]]) <- as.character(unlist(seq(2002,2019,by=1)))
#   names(df[[7]]) <- as.character(unlist(seq(2001,2019,by=1)))
#   names(df[[3]]) <- as.character(unlist(seq(2003,2019,by=1)))
# }
# 
# 
# df_pollutants <- df %>% 
#   lapply(., function(x) x %>% reduce(left_join, by = "name")) 
# 
# for (i in c(1,4,5,6,8)) {
#   names(df_pollutants[[i]]) <- c("area_code",as.character(unlist(seq(2000,2019,by=1))))
#   names(df_pollutants[[2]]) <- c("area_code",as.character(unlist(seq(2002,2019,by=1))))
#   names(df_pollutants[[7]]) <- c("area_code",as.character(unlist(seq(2001,2019,by=1))))
#   names(df_pollutants[[3]]) <- c("area_code",as.character(unlist(seq(2003,2019,by=1))))
# }
# 
# df_pollutants <- df_pollutants %>% 
#   reduce(left_join, by = "area_code",all.x = TRUE,all.y = TRUE) 
# 
# 
# df_pollutants <- df_pollutants%>% 
#   rename_at(.vars = vars(seq(136,155,1)),
#             .funs = funs(sub("[.x]*$|[.y]*$", "_o3", .))) %>% 
#   rename_at(.vars = vars(seq(117,135,1)),
#             .funs = funs(sub("[.x]*$|[.y]*$", "_c6h6", .))) %>% 
#   rename_at(.vars = vars(seq(97,116,1)),
#             .funs = funs(sub("[.x]*$|[.y]*$", "_so2", .)))%>% 
#   rename_at(.vars = vars(seq(77,96,1)),
#             .funs = funs(sub("[.x]*$|[.y]*$", "_nox", .))) %>% 
#   rename_at(.vars = vars(seq(57,76,1)),
#             .funs = funs(sub("[.x]*$|[.y]*$", "_no2", .)))%>% 
#   rename_at(.vars = vars(seq(40,56,1)),
#             .funs = funs(sub("[.x]*$|[.y]*$", "_co", .))) %>% 
#   rename_at(.vars = vars(seq(22,39,1)),
#             .funs = funs(sub("[.x]*$|[.y]*$", "_pm25", .)))%>% 
#   rename_at(.vars = vars(seq(2,21,1)),
#             .funs = funs(sub("[.x]*$|[.y]*$", "_pm10", .))) 
# ##### saving to csv 
# write.csv(df_pollutants,here::here("ass_data",
#                              "csv",
#                              "pollution_yearly.csv"), row.names = FALSE)

#### Loading generated csv file with pollutans 
pollutants <- read_csv(here::here("ass_data", 
                                  "csv", 
                                  "pollution_yearly.csv")) 
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
#### calculating mean reading of the pollutants for the whole region of Malopolska 
pollutants_ls_mean<- pollutants%>% 
  summarise_all( mean) %>% 
  pivot_longer(-area_code,
               names_to = "year_pollutant",
               values_to = "value",
  ) %>% 
  dplyr::select(-area_code) %>% 
  mutate(year= as.factor(str_extract(year_pollutant, "^\\d+")),
         pollutant= as.character(str_extract(year_pollutant, "(?<=_)(.*)")))

#### calculating difference of the annual PM25 reading before 2008 and after  
pm25 <- pollutants%>% 
  dplyr::select(c(1,22:39))%>%
  mutate(across(.cols = 2:19, as.numeric)) %>% 
  mutate(pm25_08_18= round(rowMeans(.[c(9:18)], na.rm = TRUE),1)) %>%
  mutate(pm25_02_08= round(rowMeans(.[c(2:8)], na.rm = TRUE),1)) %>% 
  mutate(pm25= pm25_08_18) %>% 
  mutate(pm25_dif= pm25_08_18-pm25_02_08) %>% 
  dplyr::select(area_code,pm25,pm25_08_18,pm25_02_08,pm25_dif)

### Density of population

density <- read_csv(here::here("ass_data", 
                               "csv", 
                               "density.csv")) %>% 
  slice(.,-2) %>% 
  row_to_names(row_number = 1) %>% 
  dplyr::rename(.,area_code=1,area_name=2)  %>% 
  right_join(.,gminy_codes, by=c("area_code"="JPT_KOD_JE"))
## Warning: Missing column names filled in: 'X4' [4], 'X5' [5], 'X6' [6], 'X7' [7],
## 'X8' [8], 'X9' [9], 'X10' [10], 'X11' [11], 'X12' [12], 'X13' [13], 'X14' [14],
## 'X15' [15], 'X16' [16], 'X17' [17], 'X18' [18], 'X19' [19], 'X20' [20]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   Kod = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## Warning in row_to_names(., row_number = 1): Row 1 does not provide unique names.
## Consider running clean_names() after row_to_names().
#### calculating difference between 02-08 and 08-19
density_08_19 <- density %>% 
  dplyr::select(c(1,3:20))%>%
  mutate(across(.cols = 2:19, as.numeric)) %>%
  mutate(density_km2_08_19= round(rowMeans(.[c(9:19)], na.rm = TRUE),0)) %>%
  mutate(density_km2= density_km2_08_19) %>% 
  dplyr::select(area_code,density_km2)
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 2:19, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
### population
pop <- read_csv(here::here("ass_data", 
                           "csv", 
                           "ludnosc.csv")) %>% 
  slice(.,c(-1,-2,-3,-5)) %>% 
  row_to_names(row_number = 1) %>% 
  dplyr::rename(.,area_code=1,area_name=2) %>% 
  mutate(across(.cols = 3:22, as.numeric))%>% 
  mutate(population_00_08 = round(rowMeans(subset(., select = as.numeric(c(3:11))), na.rm = TRUE),0)) %>% 
  mutate(population_08_19 = round(rowMeans(subset(., select = as.numeric(c(12:22))), na.rm = TRUE),0)) %>% 
  mutate(population_dif = population_08_19-population_00_08 )
## Warning: Missing column names filled in: 'X4' [4], 'X5' [5], 'X6' [6], 'X7' [7],
## 'X8' [8], 'X9' [9], 'X10' [10], 'X11' [11], 'X12' [12], 'X13' [13], 'X14' [14],
## 'X15' [15], 'X16' [16], 'X17' [17], 'X18' [18], 'X19' [19], 'X20' [20],
## 'X21' [21], 'X22' [22]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   Kod = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## Warning in row_to_names(., row_number = 1): Row 1 does not provide unique names.
## Consider running clean_names() after row_to_names().
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
pop_dif <- pop %>% 
  dplyr::select(area_code,population_dif) %>% 
  mutate(population_dif = replace_na(population_dif, 0))

pop_08_19 <- pop %>% 
  dplyr::select(area_code,population_08_19)


### Furnaces removed in malopolska during 2008-2019

furnaces <- read_csv(here::here("ass_data", 
                                "csv", 
                                "furnaces_final.csv")) %>% 
  dplyr::select(area_code,area_name,podregion,powiat_name,
                typ_gminy,'2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018') %>% 
  drop_na(area_code)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   area_code = col_double(),
##   area_name = col_character(),
##   podregion = col_character(),
##   powiat_name = col_character(),
##   typ_gminy = col_character(),
##   `2008` = col_double(),
##   `2009` = col_double(),
##   `2010` = col_double(),
##   `2011` = col_double(),
##   `2012` = col_double(),
##   `2013` = col_double(),
##   `2014` = col_double(),
##   `2015` = col_double(),
##   `2016` = col_double(),
##   `2017` = col_double(),
##   `2018` = col_double()
## )
furnaces_final <- furnaces %>% 
  mutate(type_n=case_when(str_detect(typ_gminy, "City|Urban$") ~ "1",
                          str_detect(typ_gminy, "Urban-rural") ~ "3",
                          str_detect(typ_gminy, "Rural") ~ "2")) %>% 
  mutate(area_code=paste(area_code, type_n,sep="")) %>% 
  mutate(all_removed= rowSums(.[sapply(.,is.numeric)], na.rm = TRUE)) %>%
  mutate(mean_removed= round(rowMeans(.[sapply(.,is.numeric)], na.rm = TRUE),0)) %>%
  mutate(area_code=as.numeric(area_code)) %>% 
  left_join(.,pop_08_19,by="area_code") %>% 
  mutate(furnaces_per_1000= round(all_removed/(population_08_19/1000),0))

total_fur <- sum(furnaces_final$all_removed)

furnaces_08_19 <- furnaces_final %>% 
  dplyr::select(area_code,furnaces_per_1000)

gminy_LP_fur <- gminy_LP %>%
  mutate(JPT_KOD_JE=as.numeric(JPT_KOD_JE)) %>%
  left_join(.,furnaces_final,
            by = c("JPT_KOD_JE"="area_code"))




### roads length in subregions

# following piece of code is commented out because running it might take up to 20min
# The output of this section of the code is the data frame called "roads_length"
# which consist of total road length in metres for all the 182 subregions in malopolska
# calculated using OSM data

# roads <- st_read(here::here("ass_data", 
#                             "geo", 'malopolskie-latest-free',
#                             "gis_osm_roads_free_1.shp"))%>% 
#   st_transform(., 2180)
# 
# roads_length <- lapply(gminy_LP_grouped,function(x) sum(st_length(sf::st_intersection(roads,x))))
# 
# names<- gminy_codes%>% 
#   arrange(., JPT_KOD_JE) %>%
#   dplyr::select(JPT_KOD_JE) %>% 
#   pull()
# 
# 
# roads_df<-as.data.frame(roads_length) 
# colnames(roads_df)<- names
# 
# roads_df<- roads_df %>% 
#   pivot_longer(everything(), 
#                names_to="area_code", 
#                values_to="roads_length_m") %>% 
#   mutate(roads_length_m = as.numeric(gsub("[m]$", "", roads_length_m))) %>%
#   mutate(area_code=as.numeric(area_code))%>% 
#   left_join(.,gminy_codes,by=c("area_code"="JPT_KOD_JE"))
# 
# write.csv(roads_df,here::here("ass_data", 
#                               "csv", 
#                               "roads_length.csv"), row.names = FALSE)


roads <- read_csv(here::here("ass_data", 
                             "csv",
                             "roads_length.csv")) %>% 
  mutate(roads_km=round(roads_lenght_m/1000,3))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   area_code = col_double(),
##   roads_lenght_m = col_double(),
##   JPT_NAZWA_ = col_character()
## )
### forrest area 

forrest <- read_csv(here::here("ass_data", 
                             "csv",
                             "forrest_area_prc.csv")) %>% 
  slice(.,c(-2))%>% 
  row_to_names(row_number = 1) %>% 
  dplyr::rename(.,area_code=1,area_name=2) %>% 
  mutate(across(.cols = 3:22, as.numeric)) %>% 
  left_join(gminy_codes,.,by=c("JPT_KOD_JE"="area_code")) %>% 
  mutate(forrest_08_19 = round(rowMeans(subset(., select = as.numeric(c(13:23))), na.rm = TRUE),0)) %>% 
  mutate(forrest = forrest_08_19) %>% 
  dplyr::select(JPT_KOD_JE,forrest)
## Warning: Missing column names filled in: 'X4' [4], 'X5' [5], 'X6' [6], 'X7' [7],
## 'X8' [8], 'X9' [9], 'X10' [10], 'X11' [11], 'X12' [12], 'X13' [13], 'X14' [14],
## 'X15' [15], 'X16' [16], 'X17' [17], 'X18' [18], 'X19' [19], 'X20' [20],
## 'X21' [21], 'X22' [22]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   Kod = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## Warning in row_to_names(., row_number = 1): Row 1 does not provide unique names.
## Consider running clean_names() after row_to_names().
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:22, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
### land use data 

# following piece of code is commented out because running it might take up to 20min
# The output of this section of the code is the data frame called "urban_area"
# which consist of percentages of urban development in the subregion for all the 182 subregions in malopolska
# calculated using OSM data

# land <- st_read(here::here("ass_data", 
#                            "geo", 'malopolskie-latest-free',
#                            "gis_osm_landuse_a_free_1.shp"))%>% 
#   st_transform(., 2180)
# 
# urban<-land %>% 
#   dplyr::filter(fclass=="industrial"|fclass=="retail"|fclass=="residential"|fclass=="commercial"|fclass=="heath")
# 
# urban_area <- lapply(gminy_LP_grouped,function(x) sum(st_area(sf::st_intersection(urban,x))))
# total_area <- lapply(gminy_LP_grouped,function(x) st_area(x))
# 
# 
# urban_df<-as.data.frame(urban_area) 
# total_df<- as.data.frame(total_area) 
# 
# colnames(urban_df)<- names
# colnames(total_df)<- names
# 
# urban_df<- urban_df %>% 
#   pivot_longer(everything(), 
#                names_to="area_code", 
#                values_to="urban_area_m2") %>% left_join(.,
#                                                         pivot_longer(total_df,everything(), 
#                                                                      names_to="area_code", 
#                                                                      values_to="total_area_m2"),
#                                                         by="area_code") %>% 
#   mutate(total_area_m2=as.numeric(total_area_m2),
#          urban_area_m2=as.numeric(urban_area_m2),
#          area_code=as.numeric(area_code))%>% 
#   left_join(.,
#             gminy_codes,
#             by=c("area_code"="JPT_KOD_JE")) %>% 
#   mutate(urban_landuse = urban_area_m2/total_area_m2*100) %>% 
#   mutate_at(vars(-area_code,-JPT_NAZWA_), funs(round(., 1)))
# 
# write.csv(urban_df,here::here("ass_data", 
#                               "csv", 
#                               "urban_area.csv"), row.names = FALSE)

urban<-read_csv(here::here("ass_data", 
                           "csv", 
                           "urban_area.csv")) %>% 
  dplyr::select(area_code,urban_landuse,total_area_m2)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   area_code = col_double(),
##   urban_area_m2 = col_double(),
##   total_area_m2 = col_double(),
##   JPT_NAZWA_ = col_character(),
##   urban_landuse = col_double()
## )
### altitude data

# following piece of code is commented out because running it might take up to 20min
# The output of this section of the code is the data frame called "elevation."
# which consist of average altitude for all the 182 subregions in malopolska
# calculated using elevatr package

# ls_border <- lesserpoland %>%
#   st_transform(., 2180) %>% 
#   as(., 'Spatial')
# 
# #### creating the dataframe with SP bbox
# loc_df <- data.frame(x = runif(6,min=sp::bbox(buffer(ls_border,10000))[1,1], 
#                                max=sp::bbox(buffer(ls_border,10000))[1,2]),
#                      y = runif(6,min=sp::bbox(buffer(ls_border,10000))[2,1], 
#                                max=sp::bbox(buffer(ls_border,10000))[2,2]))
# 
# #### getting the altitude data using the elevatr package
# x <- get_elev_raster(locations = loc_df, prj = sp::proj4string(ls_border), z=10)
# 
# elev_mask<-mask(x,ls_border)
# 
# writeRaster(elev_mask, filename=here::here("ass_data", 
#                                            "csv", 
#                                            "ls_elevation.tif"), format="GTiff", overwrite=TRUE)
# 
# #### extracting mean altitude from a raster and adding it to dataframe
# 
# elev_gminy <- lapply(gminy_LP_grouped,function(x) raster::extract(elev_mask,x,small=TRUE)) 
# elev_gminy_mean <- elev_gminy %>% 
#   lapply(.,function(x) as.numeric(as.character(unlist(x)))) %>% 
#   lapply(.,function(x) mean(x,na.rm = TRUE))
# 
# plot(mask(elev_mask,gminy_LP_grouped[[180]]))
# 
# elev_df<-as.data.frame(elev_gminy_mean) 
# 
# colnames(elev_df)<- names
# 
# elev_df<- elev_df %>% 
#   pivot_longer(everything(), 
#                names_to="area_code", 
#                values_to="elevation_m") %>%
#   mutate(area_code=as.numeric(area_code))%>% 
#   left_join(.,
#             gminy_codes,
#             by=c("area_code"="JPT_KOD_JE")) %>% 
#   
#   write.csv(elev_df,here::here("ass_data", 
#                                "csv", 
#                                "elevation.csv"), row.names = FALSE)

#### loading the altitude data
altitude<-read_csv(here::here("ass_data", 
                              "csv", 
                              "elevation.csv")) %>% 
  rename(altitude_m_masl=elevation_m)
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   area_code = col_double(),
##   elevation_m = col_double(),
##   JPT_NAZWA_ = col_character()
## )
#### gminy budget
budget<-read_csv(here::here("ass_data", 
                              "csv", 
                              "gminy_budget.csv")) %>% 
  slice(.,c(-1,-3))%>% 
  row_to_names(row_number = 1) %>% 
  dplyr::rename(.,area_code=1,area_name=2) %>% 
  mutate(across(.cols = 3:21, as.numeric)) %>% 
  left_join(gminy_codes,.,by=c("JPT_KOD_JE"="area_code")) %>% 
  mutate(budget_08_19 = round(rowMeans(subset(., select = as.numeric(c(11:22))), na.rm = TRUE),0)) %>% 
  mutate(budget = budget_08_19) %>% 
  dplyr::select(JPT_KOD_JE,budget)
## Warning: Missing column names filled in: 'X4' [4], 'X5' [5], 'X6' [6], 'X7' [7],
## 'X8' [8], 'X9' [9], 'X10' [10], 'X11' [11], 'X12' [12], 'X13' [13], 'X14' [14],
## 'X15' [15], 'X16' [16], 'X17' [17], 'X18' [18], 'X19' [19], 'X20' [20],
## 'X21' [21]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   Kod = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## Warning in row_to_names(., row_number = 1): Row 1 does not provide unique names.
## Consider running clean_names() after row_to_names().
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:21, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
#### gminy flats
flats<-read_csv(here::here("ass_data", 
                            "csv", 
                            "flats.csv")) %>% 
  slice(.,c(-2))%>% 
  row_to_names(row_number = 1) %>% 
  dplyr::rename(.,area_code=1,area_name=2) %>% 
  mutate(across(.cols = 3:20, as.numeric)) %>% 
  left_join(gminy_codes,.,by=c("JPT_KOD_JE"="area_code")) %>% 
  mutate(flats_08_19 = round(rowMeans(subset(., select = as.numeric(c(10:21))), na.rm = TRUE),1)) %>% 
  mutate(flats = flats_08_19) %>% 
  dplyr::select(JPT_KOD_JE,flats)
## Warning: Missing column names filled in: 'X4' [4], 'X5' [5], 'X6' [6], 'X7' [7],
## 'X8' [8], 'X9' [9], 'X10' [10], 'X11' [11], 'X12' [12], 'X13' [13], 'X14' [14],
## 'X15' [15], 'X16' [16], 'X17' [17], 'X18' [18], 'X19' [19], 'X20' [20]
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   Kod = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
## Warning in row_to_names(., row_number = 1): Row 1 does not provide unique names.
## Consider running clean_names() after row_to_names().
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion
## Warning: Problem with `mutate()` input `..1`.
## ℹ NAs introduced by coercion
## ℹ Input `..1` is `across(.cols = 3:20, as.numeric)`.
## Warning in fn(col, ...): NAs introduced by coercion

1.3 Descriptive statistics

### assembling the final data frame used for the analysis 

df <- gminy_LP %>% 
  dplyr::select(JPT_KOD_JE,JPT_NAZWA_) %>% 
  rename(area_code=JPT_KOD_JE,
         area_name=JPT_NAZWA_) %>% 
  mutate(area_code= as.numeric(area_code)) %>% 
  left_join(.,density_08_19, by="area_code") %>% 
  left_join(.,furnaces_08_19,by="area_code") %>% 
  left_join(.,pm25, by="area_code") %>% 
  left_join(.,pop_08_19, by="area_code") %>% 
  left_join(.,roads[c("area_code","roads_km")], by="area_code") %>% 
  left_join(.,urban, by="area_code") %>% 
  left_join(.,altitude,by="area_code") %>% 
  left_join(.,forrest, by=c("area_code"="JPT_KOD_JE")) %>% 
  left_join(.,budget, by=c("area_code"="JPT_KOD_JE")) %>% 
  left_join(.,flats, by=c("area_code"="JPT_KOD_JE")) %>% 
  mutate(roads_density_km2= roads_km/(total_area_m2/1000000)) 

write<- df %>% 
  st_drop_geometry() %>% 
  write.csv(.,here::here("ass_data", 
  "csv", 
  "df.csv"), row.names = FALSE)

df_tidy<- df %>% 
  st_drop_geometry() %>% 
  dplyr::select(is.numeric,-area_code)%>%
  pivot_longer(everything(),
               names_to="All_variables", 
               values_to="val")%>%
  mutate(All_variables = tolower(All_variables))
## Warning: Predicate functions must be wrapped in `where()`.
## 
##   # Bad
##   data %>% select(is.numeric)
## 
##   # Good
##   data %>% select(where(is.numeric))
## 
## ℹ Please update your code.
## This message is displayed once per session.
### plotting annual polluntion trends in the region of lesserpoland 
trend_ls <- pollutants_ls_mean %>% 
  dplyr::filter(pollutant=="pm25") %>% 
  ggplot(., aes(x=year,y=value)) + 
  geom_line(aes(group=pollutant),size=.25)+
  geom_point(size=2,colour="red")+ geom_vline(aes(xintercept="2008"),linetype = 2,size=.5)+
  geom_text(aes(x=7.25, label="MAQP", y=21), colour="black", angle=90,size=3 )+
  labs(title=expression("Annual conentrantion of PM"[2.5]*" in Małopolska"),subtitle = "",
      x ="Years", y = expression("Annual PM"[2.5]* " Concentration [μg/m"^3*"]" ))+
  theme_minimal()
trend_ls

#ggsave("trend_ls.png", trend_ls, width=9, height=4,dpi=600)

### plotting study area map
ls_stations<- Stations %>% 
  left_join(pm25_poland,.,by = c("Kod stacji" = "Kod stacji")) %>% 
  drop_na(.,"WGS84 φ N") %>% 
  st_as_sf(.,coords = c("WGS84 λ E","WGS84 φ N"), crs = 4326) %>%
  st_transform(., 2180) %>% 
  st_intersection(.,lesserpoland)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
ls <- gminy_LP %>% 
  mutate(area_code=as.numeric(JPT_KOD_JE)) %>% 
  left_join(.,density_08_19,by="area_code")

cities <- gminy_LP %>% 
  filter(str_detect(JPT_KOD_JE,'^126')) 
cities$JPT_NAZWA_[cities$JPT_NAZWA_ == "Nowy Sącz"] <- "Nowy Sacz"

tmap_mode("plot")
## tmap mode set to plotting
ls_bb<- st_bbox(lesserpoland,
                crs = st_crs(lesserpoland)) %>% 
  st_as_sfc()
main <- tm_shape(ls, bbbox = ls_bb) + 
  tm_polygons("density_km2",alpha=.75, palette="PuBu",style="fisher", title = 'Population Density\npeople per square km')+
  tm_compass(type = "arrow", position = c("right", "bottom")) +
  tm_shape(ls_stations)+ tm_dots(col="yellow",size = 0.05)+
  tm_layout(legend.position = c("left","bottom"), 
            legend.text.size=0.6, 
            legend.title.size = 0.75,
            legend.title.fontface = 1,
            legend.height = 0.9,
            frame=FALSE)+
  tm_shape(cities)+ tm_borders(col = "black",lwd = 1.5)+
  tm_symbols(col = "red", scale = .5)+
  tm_text("JPT_NAZWA_", xmod=0, ymod=-1,size=0.7,fontface="bold",bg.color = "white")+
  tm_layout(inner.margin=c(0.02,0.02,0.02,0.2))+
  tm_credits("(a)", position=c(0,0.85), size=1.5)+
  tm_add_legend('symbol',col="yellow",size = 0.35,
                border.col = "black",
                labels = "The air quality monitoring stations")
  

  
inset = tm_shape(wojewodztwa) + tm_fill(col="white")+ tm_borders(col = "black")+
  tm_shape(lesserpoland) + tm_fill(col="lightblue")+
  tm_borders(col = "black", lwd = 1)+
  tm_layout(frame=FALSE,
            bg.color = "transparent")+
  tm_credits("(b)", position=c(0,0.85), size=1.5)
  
#tmap_save(main,insets_tm = inset,insets_vp=viewport(0.86, 0.7, width = 0.35, height = 0.35), filename="test.png", dpi=300)

### histogram dependent variable PM25

pm25_hist <- df_tidy%>%
  filter(All_variables=="pm25")%>%
  ggplot(., aes(x=val)) + 
  geom_histogram(aes(x = val, y = ..density..),color="black", fill="white")+
  geom_density(colour="red", size=1, adjust=1)
pm25_hist
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

### Dependent variable descriptive statistics
summary(df$pm25)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   28.30   31.40   32.80   32.55   33.60   37.70
### histogram independent variable 
independent_hist <- df_tidy%>%
  filter(All_variables!="pm25")%>%
  ggplot(., aes(x=val)) + 
  geom_histogram(aes(x = val, y = ..density..),color="black", fill="white")+
  geom_density(colour="red", size=1, adjust=1)+
  facet_wrap(~All_variables, scales = 'free')
independent_hist
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

### transforming data log1p independent

independent_log1phist <- df_tidy%>%
  filter(All_variables!="pm25")%>%
  ggplot(., aes(x=log1p(val))) + 
  geom_histogram(aes(x = log1p(val), y = ..density..),color="black", fill="white")+
  geom_density(colour="red", size=1, adjust=1)+
  facet_wrap(~All_variables, scales = 'free')
independent_log1phist
## Warning in log1p(val): NaNs produced
## Warning in log1p(val): NaNs produced

## Warning in log1p(val): NaNs produced
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 125 rows containing non-finite values (stat_bin).
## Warning: Removed 125 rows containing non-finite values (stat_density).

density_loghist <- df_tidy%>%
  filter(All_variables=="density_km2")%>%
  ggplot(., aes(x=log(val))) + 
  geom_histogram(aes(x = log(val), y = ..density..),color="black", fill="white")+
  geom_density(colour="red", size=1, adjust=1)+
  facet_wrap(~All_variables, scales = 'free')
density_loghist
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

### plotting histograms
supp.labs <- c("Log1p(Flats per 1000 residents)", "Forrest Area","PM2.5","Log1p(Roads Density)","Log1p(Altitude)")
names(supp.labs) <- c("flats", "forrest","pm25","roads_density_km2","altitude_m_masl")

Distribution_hist <-df %>% 
  st_drop_geometry() %>% 
  dplyr::select(is.numeric,-area_code)%>%
  mutate(roads_density_km2=log1p(roads_density_km2),
         altitude_m_masl=log1p(altitude_m_masl),
         flats=log1p(flats)) %>% 
  pivot_longer(everything(),
               names_to="All_variables", 
               values_to="val")%>%
  mutate(All_variables = tolower(All_variables))%>%
  filter(All_variables=="pm25"|
           All_variables=="roads_density_km2"|
           All_variables=="altitude_m_masl"|
           All_variables=="flats"|
           All_variables=="forrest")%>%
  ggplot(., aes(x=val)) + 
  geom_histogram(aes(x = val, y = ..density..),color="black", fill="white")+
  geom_density(colour="red", size=1, adjust=1)+
  labs(x = NULL, y = NULL)+
  facet_wrap(~All_variables,ncol=1,scales="free",dir="v" ,labeller =labeller(All_variables=supp.labs))+
  theme_minimal()
Distribution_hist
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

### boxplots
pm25_box <- df_tidy%>%
  filter(All_variables=="pm25")%>%
  ggplot(., aes(y=val)) + 
  geom_boxplot()
pm25_box

log_furnaces_box <- df_tidy%>%
  filter(All_variables=="furnaces_per_1000")%>%
  ggplot(., aes(y=log1p(val))) + 
  geom_boxplot()
log_furnaces_box

### mapping pm25 and furnaces
tmap_mode("plot")
## tmap mode set to plotting
breaks_pm25 <- c(28,30,33.5,34,34.5,35,38)
pm25_08_18_map <- tm_shape(df, bbbox = ls_bb) + 
  tm_polygons(col='pm25_08_18',breaks=breaks_pm25,#style="jenks",
              palette="YlOrRd",alpha=1,title=expression('PM'[2.5]* '[μg/m'^3*']'))+
  tm_layout(title = "Period 2008-2018",
            title.position = c('right','top'),
            title.size = 1,
            legend.position = c(0,0), 
            legend.text.size=0.6, 
            legend.title.size = 1,
            legend.title.fontface = 1,
            legend.height = 0.4,
            frame=FALSE)+
  tm_credits("(b)", position=c(0,0.85), size=1)

pm25_02_08_map <- tm_shape(df, bbbox = ls_bb) + 
  tm_polygons(col='pm25_02_08',breaks=breaks_pm25,#style="fisher",
              palette="YlOrRd",alpha=1,title=expression('PM'[2.5]* '[μg/m'^3*']'))+
  tm_layout(title = "Period 2002-2007",
            title.position = c('right','top'),
            title.size = 1,
            legend.position = c(0,0), 
            legend.text.size=0.6, 
            legend.title.size = 1,
            legend.title.fontface = 1,
            legend.height = 0.4,
            frame=FALSE)+
  tm_credits("(a)", position=c(0,0.85), size=1)

pm25_dif_map <- tm_shape(df, bbbox = ls_bb) + 
  tm_polygons(col='pm25_dif',style="fisher",
              palette="seq",alpha=1,title=expression('Change in PM'[2.5]))+
  tm_layout(title = "Change (b)-(a)",
            title.position = c('right','top'),
            aes.palette = list(seq = "-Spectral"),
            title.size = 1,
            legend.position = c(0,0), 
            legend.text.size=0.6, 
            legend.title.size = 0.75,
            legend.title.fontface = 1,
            legend.height = 0.4,
            frame=FALSE)+
  tm_credits("(c)", position=c(0,0.85), size=1)


furnaces_map <- tm_shape(df, bbbox = ls_bb) + 
  tm_polygons(col='furnaces_per_1000',style="fisher",
              palette="YlGnBu",alpha=1,title='Removed furnaces\nper 1000 residents')+
  tm_layout(title = "Period 2008-2018",
            title.position = c('right','top'),
            title.size = 1,
            legend.position = c(0,0), 
            legend.text.size=0.6, 
            legend.title.size = 0.6,
            legend.title.fontface = 1,
            legend.height = 0.4,
            frame=FALSE)+
  tm_credits("(d)", position=c(0,0.85), size=1)

t<-tmap_arrange(pm25_02_08_map,pm25_08_18_map,pm25_dif_map,furnaces_map, ncol=2)
t
## Variable(s) "pm25_dif" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#tmap_save(t, 'pm25_ls.png')

1.4 Spatial dependance

###calculating centroids for regions
coordsR <- df%>%
  st_centroid()%>%
  st_geometry()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
###adujusting centroids to stay with in the polygons
coordsR_ad <- df%>%
  st_point_on_surface(.)%>%
  st_geometry()
## Warning in st_point_on_surface.sf(.): st_point_on_surface assumes attributes are
## constant over geometries of x
par(mfrow=c(1,1))

plot(coordsR,axes=TRUE,col="blue")
plot(coordsR_ad,axes=TRUE,col="red",add=TRUE)
plot(gminy_LP$geometry,alpha=0.5, add=TRUE)

## Contiguity

#create a neighbours list

gminy_nb <- df%>%
  poly2nb(., queen=T)

knn_gminy <-coordsR_ad %>%
  knearneigh(., k=6)

knn_4_gminy <-coordsR_ad %>%
  knearneigh(., k=4)

gminy_knn <- knn_gminy %>%
  knn2nb()

gminy_knn_4 <- knn_4_gminy %>%
  knn2nb()

#plot them
plot(gminy_nb, st_geometry(coordsR_ad), col="red")
plot(gminy_LP$geometry, add=T)

plot(gminy_knn, st_geometry(coordsR_ad), col="blue")
plot(gminy_LP$geometry, add=T)

#create a spatial weights object from these weights
Gminy.queens_weight<- gminy_nb %>%
  nb2listw(., style="C")

Gminy.knn_6_weight<- gminy_knn %>%
  nb2listw(., style="C")

Gminy.knn_4_weight<- gminy_knn_4 %>%
  nb2listw(., style="C")


##Testing
#Moran’s I test
I_LsGminy_Global_PM25 <- df%>%
  pull(pm25) %>%
  as.vector()%>%
  moran.test(., Gminy.queens_weight)

I_LsGminy_Global_PM25
## 
##  Moran I test under randomisation
## 
## data:  .  
## weights: Gminy.queens_weight    
## 
## Moran I statistic standard deviate = 20.233, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.890584834      -0.005524862       0.001961541
#Geary’s C

C_LsGminy_Global_PM25 <- df%>%
  pull(pm25) %>%
  as.vector()%>%
  geary.test(., Gminy.queens_weight)

C_LsGminy_Global_PM25
## 
##  Geary C test under randomisation
## 
## data:  . 
## weights: Gminy.queens_weight 
## 
## Geary C statistic standard deviate = 15.352, p-value < 2.2e-16
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic       Expectation          Variance 
##       0.120567100       1.000000000       0.003281688
###Localised statistics
breaks1<-c(-1000,-2.58,-1.96,-1.65,1.65,1.96,2.58,1000)
##Local Moran’s I

I_LsGminy_Global_PM25 <- df %>%
  pull(pm25) %>%
  as.vector()%>%
  localmoran(., Gminy.queens_weight)%>%
  as_tibble()


df<- df%>%
  mutate(pm25_pvalue =as.numeric(I_LsGminy_Global_PM25$`Pr(z > 0)`))%>%
  mutate(pm25_I =as.numeric(I_LsGminy_Global_PM25$Ii))%>%
  mutate(pm25_Iz =as.numeric(I_LsGminy_Global_PM25$Z.Ii))

#mapping Local Moran’s I

local_m<-tm_shape(df,bbbox = ls_bb) +
  tm_polygons(col="pm25_Iz",
              style="fixed",
              breaks=breaks1,
              palette="RdGy",
              midpoint=0,
              title="Z-score")+
  tm_layout(title = "Local Moran's I",
            title.position = c('right','top'),
            title.size = 1,
            legend.position = c(0,0), 
            legend.text.size=0.6, 
            legend.title.size = 0.75,
            legend.title.fontface = 1,
            legend.height = 0.4,
            frame=FALSE)+
  tm_shape(df %>% filter(pm25_pvalue < 0.05))+
  tm_borders(lwd=1.5,col = "black")+
  tm_add_legend('line',col="black",size = 0.5,
                labels = "P-value < 0.05")+
  tm_credits("(a)", position=c(0,0.85), size=1)


## Gi* statistic

G_LsGminy_Local_PM25 <- df%>%
  dplyr::pull(pm25) %>%
  as.vector()%>%
  localG(.,Gminy.queens_weight)

df<- df%>%
  dplyr::mutate(pm25_LocGiz = as.numeric(G_LsGminy_Local_PM25))


local_g<-tm_shape(df,bbbox = ls_bb) +
  tm_polygons("pm25_LocGiz",
              style="fixed",
              breaks=breaks1,
              palette="RdBu",
              midpoint=NA,
              title="Z-score")+
  tm_layout(title = "Getis-Ord Gi*",
            title.position = c('right','top'),
            title.size = 1,
            legend.position = c(0,0), 
            legend.text.size=0.6, 
            legend.title.size = 0.75,
            legend.title.fontface = 1,
            legend.height = 0.4,
            frame=FALSE)+
  tm_credits("(b)", position=c(0,0.85), size=1)

#save plot
t_loc<- tmap_arrange(local_m,local_g,ncol=2)
t_loc

#tmap_save(t_loc, 'local.png', height =3.8, width = 7,unit="in",
#          dpi = 600)

1.5 OLS regressions

### model1
model1 <- lm(pm25 ~ log1p(furnaces_per_1000) ,
             data = df)
tidy(model1)
## # A tibble: 2 x 5
##   term                     estimate std.error statistic   p.value
##   <chr>                       <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)                31.4       0.351     89.3  5.16e-151
## 2 log1p(furnaces_per_1000)    0.629     0.169      3.72 2.66e-  4
summary(model1)
## 
## Call:
## lm(formula = pm25 ~ log1p(furnaces_per_1000), data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8361 -1.1198  0.1572  1.2030  4.8355 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               31.3564     0.3511   89.32  < 2e-16 ***
## log1p(furnaces_per_1000)   0.6289     0.1690    3.72 0.000266 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.923 on 180 degrees of freedom
## Multiple R-squared:  0.0714, Adjusted R-squared:  0.06624 
## F-statistic: 13.84 on 1 and 180 DF,  p-value: 0.0002657
### model2
model2 <- lm(pm25 ~ log1p(furnaces_per_1000)+density_km2+log1p(roads_density_km2)+
               log1p(altitude_m_masl)+log1p(urban_landuse)+log(total_area_m2) ,
             data = df)
tidy(model2)
## # A tibble: 7 x 5
##   term                      estimate std.error statistic    p.value
##   <chr>                        <dbl>     <dbl>     <dbl>      <dbl>
## 1 (Intercept)              26.0       5.40         4.81  0.00000324
## 2 log1p(furnaces_per_1000)  0.476     0.170        2.79  0.00582   
## 3 density_km2              -0.000901  0.000773    -1.17  0.245     
## 4 log1p(roads_density_km2)  2.16      0.712        3.04  0.00273   
## 5 log1p(altitude_m_masl)    1.11      0.317        3.49  0.000603  
## 6 log1p(urban_landuse)      0.0478    0.171        0.279 0.781     
## 7 log(total_area_m2)       -0.242     0.272       -0.891 0.374
summary(model2)
## 
## Call:
## lm(formula = pm25 ~ log1p(furnaces_per_1000) + density_km2 + 
##     log1p(roads_density_km2) + log1p(altitude_m_masl) + log1p(urban_landuse) + 
##     log(total_area_m2), data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7352 -1.0631  0.0483  1.3118  4.1111 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              25.9501527  5.3952829   4.810 3.24e-06 ***
## log1p(furnaces_per_1000)  0.4755645  0.1703367   2.792 0.005822 ** 
## density_km2              -0.0009013  0.0007730  -1.166 0.245237    
## log1p(roads_density_km2)  2.1641310  0.7118235   3.040 0.002727 ** 
## log1p(altitude_m_masl)    1.1066947  0.3167331   3.494 0.000603 ***
## log1p(urban_landuse)      0.0478068  0.1714318   0.279 0.780676    
## log(total_area_m2)       -0.2424012  0.2720007  -0.891 0.374057    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.825 on 175 degrees of freedom
## Multiple R-squared:  0.187,  Adjusted R-squared:  0.1592 
## F-statistic:  6.71 on 6 and 175 DF,  p-value: 2.068e-06
### model 2.2
model2.2 <- lm(pm25 ~ log1p(furnaces_per_1000)+log1p(roads_density_km2)+
               log1p(altitude_m_masl) + forrest +flats + budget,
             data = df)
tidy(model2.2)
## # A tibble: 7 x 5
##   term                      estimate std.error statistic  p.value
##   <chr>                        <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)              20.5       2.36         8.69  2.55e-15
## 2 log1p(furnaces_per_1000)  0.483     0.150        3.22  1.55e- 3
## 3 log1p(roads_density_km2)  0.641     0.438        1.46  1.46e- 1
## 4 log1p(altitude_m_masl)    1.51      0.394        3.82  1.85e- 4
## 5 forrest                  -0.0224    0.00984     -2.27  2.42e- 2
## 6 flats                     0.420     0.0731       5.74  4.08e- 8
## 7 budget                    0.000114  0.000254     0.451 6.53e- 1
summary(model2.2)
## 
## Call:
## lm(formula = pm25 ~ log1p(furnaces_per_1000) + log1p(roads_density_km2) + 
##     log1p(altitude_m_masl) + forrest + flats + budget, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7654 -0.8273  0.1572  1.1628  3.1486 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              20.5338117  2.3632821   8.689 2.55e-15 ***
## log1p(furnaces_per_1000)  0.4831270  0.1502073   3.216 0.001546 ** 
## log1p(roads_density_km2)  0.6406090  0.4383139   1.462 0.145663    
## log1p(altitude_m_masl)    1.5064306  0.3943865   3.820 0.000185 ***
## forrest                  -0.0223729  0.0098368  -2.274 0.024156 *  
## flats                     0.4198855  0.0731412   5.741 4.08e-08 ***
## budget                    0.0001144  0.0002539   0.451 0.652790    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.64 on 175 degrees of freedom
## Multiple R-squared:  0.3432, Adjusted R-squared:  0.3207 
## F-statistic: 15.24 on 6 and 175 DF,  p-value: 5.149e-14
### model3
model3 <- lm(pm25 ~ log1p(roads_density_km2)+
               log1p(altitude_m_masl) +log1p(flats)+ forrest,
             data = df)
tidy(model3)
## # A tibble: 5 x 5
##   term                     estimate std.error statistic  p.value
##   <chr>                       <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)               19.4       2.37        8.21 4.58e-14
## 2 log1p(roads_density_km2)   1.11      0.434       2.56 1.13e- 2
## 3 log1p(altitude_m_masl)     1.58      0.407       3.89 1.40e- 4
## 4 log1p(flats)               1.92      0.369       5.21 5.16e- 7
## 5 forrest                   -0.0268    0.0101     -2.65 8.73e- 3
summary(model3)
## 
## Call:
## lm(formula = pm25 ~ log1p(roads_density_km2) + log1p(altitude_m_masl) + 
##     log1p(flats) + forrest, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9285 -0.9859  0.2819  1.2018  2.9072 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              19.44059    2.36924   8.205 4.58e-14 ***
## log1p(roads_density_km2)  1.11035    0.43370   2.560  0.01130 *  
## log1p(altitude_m_masl)    1.58412    0.40683   3.894  0.00014 ***
## log1p(flats)              1.92446    0.36922   5.212 5.16e-07 ***
## forrest                  -0.02681    0.01011  -2.652  0.00873 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.701 on 177 degrees of freedom
## Multiple R-squared:  0.2857, Adjusted R-squared:  0.2695 
## F-statistic:  17.7 on 4 and 177 DF,  p-value: 3.081e-12

1.6 OLS Assumptions

#### residuals distribution analysis analysis 

#save the residuals into your dataframe

model_data <- model3 %>%
  augment(., df)

#plot residuals
model_data%>%
  dplyr::select(.resid)%>%
  pull()%>%
  qplot()+ 
  geom_histogram() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# adding residuals shapelayer
df <- df %>%
  mutate(model3resids = residuals(model3))

#### No Multicolinearity in the independent variables  

Correlation <- df %>%
  st_drop_geometry()%>%
  dplyr::select(roads_density_km2,
                altitude_m_masl,
                forrest,
                flats) %>%
  corrr::correlate()
## 
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
#visualise the correlation matrix
corrr::rplot(Correlation)
## Don't know how to automatically pick scale for object of type noquote. Defaulting to continuous.

#VIF
vif(model3)
## log1p(roads_density_km2)   log1p(altitude_m_masl)             log1p(flats) 
##                 1.234646                 2.025861                 1.177673 
##                  forrest 
##                 1.999470
#### Homoscedasticity

#print some model diagnositcs. 
par(mfrow=c(2,2))    #plot to 2 by 2 array
plot(model3)

#### Independence of Errors

#now plot the residuals
tmap_mode("plot")
## tmap mode set to plotting
OLS_res_map<- tm_shape(df) +
  tm_polygons("model3resids",
              palette = "RdYlBu",style="fisher",alpha=1,title=expression('Residuals of PM'[2.5]))+
  tm_layout(legend.position = c(0,0), 
           legend.text.size=0.8, 
           legend.title.size = 1,
           legend.title.fontface = 1,
           legend.height = 0.4,
           frame=FALSE)+
  tm_credits("(a)", position=c(0,0.85), size=1.5)+
  tm_credits("(b)", position=c(0.95,0.85), size=1.5)
OLS_res_map 
## Variable(s) "model3resids" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#tmap_save(OLS_res_map, 'OLS_res_map.png')

#running Moran's I on residuals
Queen <- df %>%
  st_drop_geometry()%>%
  dplyr::select(model3resids)%>%
  pull(model3resids)%>%
  as.vector() %>% 
  moran.test(., Gminy.queens_weight)%>%
  tidy()

Queen
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic  p.value method            alternative
##       <dbl>     <dbl>     <dbl>     <dbl>    <dbl> <chr>             <chr>      
## 1     0.786  -0.00552   0.00196      17.9 9.29e-72 Moran I test und… greater
Nearest_4_neighbour <- df %>%
  st_drop_geometry()%>%
  dplyr::select(model3resids)%>%
  pull(model3resids)%>%
  as.vector() %>% 
  moran.test(., Gminy.knn_4_weight)%>%
  tidy()
Nearest_4_neighbour
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic  p.value method            alternative
##       <dbl>     <dbl>     <dbl>     <dbl>    <dbl> <chr>             <chr>      
## 1     0.833  -0.00552   0.00244      17.0 4.87e-65 Moran I test und… greater
#Geary’s C

C_queen <- df%>%
  pull(model3resids) %>%
  as.vector()%>%
  geary.test(., Gminy.queens_weight)

C_queen
## 
##  Geary C test under randomisation
## 
## data:  . 
## weights: Gminy.queens_weight 
## 
## Geary C statistic standard deviate = 13.889, p-value < 2.2e-16
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic       Expectation          Variance 
##       0.194331724       1.000000000       0.003365095
C_Nearest_4_neighbour <- df%>%
  pull(model3resids) %>%
  as.vector()%>%
  geary.test(., Gminy.knn_4_weight)

C_Nearest_4_neighbour
## 
##  Geary C test under randomisation
## 
## data:  . 
## weights: Gminy.knn_4_weight 
## 
## Geary C statistic standard deviate = 15.676, p-value < 2.2e-16
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic       Expectation          Variance 
##       0.182841880       1.000000000       0.002717328

1.7 Spatially-lagged regression

library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     anova.sarlm, as_dgRMatrix_listw, as_dsCMatrix_I, as_dsCMatrix_IrW,
##     as_dsTMatrix_listw, as.spam.listw, bptest.sarlm, can.be.simmed,
##     cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
##     create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
##     gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
##     LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
##     print.summary.stsls, residuals.gmsar, residuals.sarlm,
##     residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
##     SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
##     stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
##     summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
pm25_model3_queen <- lagsarlm(pm25 ~ log1p(roads_density_km2)+
                                log1p(altitude_m_masl) + forrest +log1p(flats),
                              data = df, 
                              nb2listw(gminy_nb, style="C"), 
                              method = "eigen")

#what do the outputs show?
tidy(pm25_model3_queen)
## # A tibble: 6 x 5
##   term                     estimate std.error statistic  p.value
##   <chr>                       <dbl>     <dbl>     <dbl>    <dbl>
## 1 rho                        0.0407   0.0103       3.95 7.96e- 5
## 2 (Intercept)               18.4      2.26         8.17 2.22e-16
## 3 log1p(roads_density_km2)   1.12     0.410        2.72 6.48e- 3
## 4 log1p(altitude_m_masl)     1.60     0.384        4.15 3.28e- 5
## 5 forrest                   -0.0260   0.00955     -2.73 6.41e- 3
## 6 log1p(flats)               1.61     0.357        4.50 6.77e- 6
glance(pm25_model3_queen)
## # A tibble: 1 x 6
##   r.squared   AIC   BIC deviance logLik  nobs
##       <dbl> <dbl> <dbl>    <dbl>  <dbl> <int>
## 1     0.345  703.  726.     470.  -345.   182
summary(pm25_model3_queen)
## 
## Call:
## lagsarlm(formula = pm25 ~ log1p(roads_density_km2) + log1p(altitude_m_masl) + 
##     forrest + log1p(flats), data = df, listw = nb2listw(gminy_nb, 
##     style = "C"), method = "eigen")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5.18702 -1.09838  0.22661  1.30729  2.94210 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                            Estimate Std. Error z value  Pr(>|z|)
## (Intercept)              18.4352484  2.2566984  8.1691 2.220e-16
## log1p(roads_density_km2)  1.1154863  0.4097261  2.7225  0.006479
## log1p(altitude_m_masl)    1.5961712  0.3843557  4.1528 3.284e-05
## forrest                  -0.0260349  0.0095498 -2.7262  0.006407
## log1p(flats)              1.6070245  0.3570563  4.5008 6.771e-06
## 
## Rho: 0.040702, LR test value: 15.616, p-value: 7.759e-05
## Asymptotic standard error: 0.010316
##     z-value: 3.9456, p-value: 7.9606e-05
## Wald statistic: 15.568, p-value: 7.9606e-05
## 
## Log likelihood: -344.5768 for lag model
## ML residual variance (sigma squared): 2.5815, (sigma: 1.6067)
## Number of observations: 182 
## Number of parameters estimated: 7 
## AIC: 703.15, (AIC for lm: 716.77)
## LM test for residual autocorrelation
## test value: 254.43, p-value: < 2.22e-16
#run a spatially-lagged regression model
pm25_model3_knn4 <- lagsarlm(pm25 ~ log1p(roads_density_km2)+
                               log1p(altitude_m_masl) + forrest +log1p(flats),
                             data = df, 
                             nb2listw(gminy_knn, style="C"), 
                             method = "eigen")

#outputs 
tidy(pm25_model3_knn4)
## # A tibble: 6 x 5
##   term                     estimate std.error statistic     p.value
##   <chr>                       <dbl>     <dbl>     <dbl>       <dbl>
## 1 rho                       0.973     0.0106     91.8   0          
## 2 (Intercept)              -0.263     0.692      -0.380 0.704      
## 3 log1p(roads_density_km2)  0.206     0.109       1.90  0.0576     
## 4 log1p(altitude_m_masl)    0.0388    0.102       0.380 0.704      
## 5 forrest                  -0.00527   0.00255    -2.06  0.0391     
## 6 log1p(flats)              0.473     0.0943      5.01  0.000000539
glance(pm25_model3_knn4)
## # A tibble: 1 x 6
##   r.squared   AIC   BIC deviance logLik  nobs
##       <dbl> <dbl> <dbl>    <dbl>  <dbl> <int>
## 1     0.956  275.  297.     33.0  -131.   182
summary(pm25_model3_knn4)
## 
## Call:
## lagsarlm(formula = pm25 ~ log1p(roads_density_km2) + log1p(altitude_m_masl) + 
##     forrest + log1p(flats), data = df, listw = nb2listw(gminy_knn, 
##     style = "C"), method = "eigen")
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.3568620 -0.1957130  0.0066692  0.1875506  1.7309113 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)              -0.2628565  0.6923558 -0.3797  0.70420
## log1p(roads_density_km2)  0.2063647  0.1086972  1.8985  0.05763
## log1p(altitude_m_masl)    0.0388211  0.1021246  0.3801  0.70385
## forrest                  -0.0052670  0.0025533 -2.0628  0.03913
## log1p(flats)              0.4727066  0.0943173  5.0119 5.39e-07
## 
## Rho: 0.97336, LR test value: 443.72, p-value: < 2.22e-16
## Asymptotic standard error: 0.010609
##     z-value: 91.752, p-value: < 2.22e-16
## Wald statistic: 8418.5, p-value: < 2.22e-16
## 
## Log likelihood: -130.5244 for lag model
## ML residual variance (sigma squared): 0.18159, (sigma: 0.42613)
## Number of observations: 182 
## Number of parameters estimated: 7 
## AIC: 275.05, (AIC for lm: 716.77)
## LM test for residual autocorrelation
## test value: 24.272, p-value: 8.3665e-07

1.8 GWR

dfSP <- df %>%
  as(., "Spatial")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO"): Discarded datum Unknown based on GRS80 ellipsoid in CRS definition,
##  but +towgs84= values preserved
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO"): Discarded
## datum European_Terrestrial_Reference_System_1989 in CRS definition
coordsR_adSP <- coordsR_ad %>%
  as(., "Spatial")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO"): Discarded datum Unknown based on GRS80 ellipsoid in CRS definition,
##  but +towgs84= values preserved

## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO"): Discarded datum European_Terrestrial_Reference_System_1989 in CRS definition
library(spgwr)
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
#calculate kernel bandwidth
GWRbandwidth <- gwr.sel(pm25 ~ log1p(roads_density_km2)+
                          log1p(altitude_m_masl) + forrest +log1p(flats), 
                        data = dfSP, 
                        coords=coordsR_adSP,
                        adapt=T)
## Warning in gwr.sel(pm25 ~ log1p(roads_density_km2) + log1p(altitude_m_masl) + :
## data is Spatial* object, ignoring coords argument
## Adaptive q: 0.381966 CV score: 369.6079 
## Adaptive q: 0.618034 CV score: 431.2713 
## Adaptive q: 0.236068 CV score: 309.0268 
## Adaptive q: 0.145898 CV score: 254.2007 
## Adaptive q: 0.09016994 CV score: 205.0326 
## Adaptive q: 0.05572809 CV score: 174.9169 
## Adaptive q: 0.03444185 CV score: 136.831 
## Adaptive q: 0.02128624 CV score: 97.1045 
## Adaptive q: 0.01315562 CV score: 93.06661 
## Adaptive q: 0.01512602 CV score: 93.54901 
## Adaptive q: 0.0111461 CV score: 93.29478 
## Adaptive q: 0.01278137 CV score: 93.04483 
## Adaptive q: 0.01269148 CV score: 93.04326 
## Adaptive q: 0.01263721 CV score: 93.04304 
## Adaptive q: 0.01259652 CV score: 93.04323 
## Adaptive q: 0.01263721 CV score: 93.04304
#run the gwr model
gwr.model = gwr(pm25 ~ log1p(roads_density_km2)+
                  log1p(altitude_m_masl) + forrest +log1p(flats),
                data = dfSP, 
                coords=coordsR_adSP , 
                adapt=GWRbandwidth, 
                hatmatrix=TRUE, 
                se.fit=TRUE)
## Warning in gwr(pm25 ~ log1p(roads_density_km2) + log1p(altitude_m_masl) + : data
## is Spatial* object, ignoring coords argument
## Warning in proj4string(data): CRS object has comment, which is lost in output
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO"): Discarded datum Unknown based on GRS80 ellipsoid in CRS definition,
##  but +towgs84= values preserved
#print the results of the model
gwr.model
## Call:
## gwr(formula = pm25 ~ log1p(roads_density_km2) + log1p(altitude_m_masl) + 
##     forrest + log1p(flats), data = dfSP, coords = coordsR_adSP, 
##     adapt = GWRbandwidth, hatmatrix = TRUE, se.fit = TRUE)
## Kernel function: gwr.Gauss 
## Adaptive quantile: 0.01263721 (about 2 of 182 data points)
## Summary of GWR coefficient estimates at data points:
##                                Min.    1st Qu.     Median    3rd Qu.       Max.
## X.Intercept.             -32.800643  20.502409  28.641659  40.242702  73.141354
## log1p.roads_density_km2.  -2.276441  -0.347733   0.031686   0.688708   3.599593
## log1p.altitude_m_masl.    -6.597649  -1.293346   0.572688   1.956441  12.343034
## forrest                   -0.117570  -0.027760  -0.011126   0.011965   0.055326
## log1p.flats.              -1.512878  -0.297254   0.389098   1.486495   3.579780
##                           Global
## X.Intercept.             19.4406
## log1p.roads_density_km2.  1.1104
## log1p.altitude_m_masl.    1.5841
## forrest                  -0.0268
## log1p.flats.              1.9245
## Number of data points: 182 
## Effective number of parameters (residual: 2traceS - traceS'S): 113.7555 
## Effective degrees of freedom (residual: 2traceS - traceS'S): 68.24447 
## Sigma (residual: 2traceS - traceS'S): 0.5662372 
## Effective number of parameters (model: traceS): 89.49725 
## Effective degrees of freedom (model: traceS): 92.50275 
## Sigma (model: traceS): 0.4863566 
## Sigma (ML): 0.346734 
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 494.9237 
## AIC (GWR p. 96, eq. 4.22): 220.443 
## Residual sum of squares: 21.88085 
## Quasi-global R2: 0.9694783
results <- as.data.frame(gwr.model$SDF)
names(results)
##  [1] "sum.w"                           "X.Intercept."                   
##  [3] "log1p.roads_density_km2."        "log1p.altitude_m_masl."         
##  [5] "forrest"                         "log1p.flats."                   
##  [7] "X.Intercept._se"                 "log1p.roads_density_km2._se"    
##  [9] "log1p.altitude_m_masl._se"       "forrest_se"                     
## [11] "log1p.flats._se"                 "gwr.e"                          
## [13] "pred"                            "pred.se"                        
## [15] "localR2"                         "X.Intercept._se_EDF"            
## [17] "log1p.roads_density_km2._se_EDF" "log1p.altitude_m_masl._se_EDF"  
## [19] "forrest_se_EDF"                  "log1p.flats._se_EDF"            
## [21] "pred.se.1"
#attach coefficients to original SF


df <- df%>%
  mutate(coefroads = results$log1p.roads_density_km2.) %>% 
  mutate(coefflats = results$log1p.flats.) %>% 
  mutate(coefforrest = results$forrest) %>% 
  mutate(coefaltitude = results$log1p.altitude_m_masl.)

#plotting gwr coef
tmap_mode("plot")
## tmap mode set to plotting
roads<- tm_shape(df ) +
  tm_polygons(col = "coefroads", 
              palette = "seq", #style="fisher",
              alpha = 1,title='Coefficient')+
tm_layout(title = "Roads Density",
          title.position = c('right','top'),
          title.size = 1,
          aes.palette = list(seq = "-RdYlGn"),
          legend.position = c(0,0), 
          legend.text.size=0.6, 
          legend.title.size = 0.6,
          legend.title.fontface = 1,
          legend.height = 0.4,
          frame=FALSE)+
  tm_credits("(a)", position=c(0,0.85), size=1)

flats<- tm_shape(df ) +
  tm_polygons(col = "coefflats", 
              palette = "seq", #style="fisher",
              alpha = 1,title='Coefficient')+
  tm_layout(title = "Flats Density",
            title.position = c('right','top'),
            title.size = 1,
            aes.palette = list(seq = "-RdYlGn"),
            legend.position = c(0,0), 
            legend.text.size=0.6, 
            legend.title.size = 0.6,
            legend.title.fontface = 1,
            legend.height = 0.4,
            frame=FALSE)+
  tm_credits("(b)", position=c(0,0.85), size=1)

altitude<- tm_shape(df ) +
  tm_polygons(col = "coefaltitude", 
              palette = "seq", #style="fisher",
              alpha = 1,title='Coefficient')+
  tm_layout(title = "Altitude",
            title.position = c('right','top'),
            title.size = 1,
            aes.palette = list(seq = "-RdYlGn"),
            legend.position = c(0,0), 
            legend.text.size=0.6, 
            legend.title.size = 0.6,
            legend.title.fontface = 1,
            legend.height = 0.4,
            frame=FALSE)+
  tm_credits("(c)", position=c(0,0.85), size=1)

forrest<- tm_shape(df ) +
  tm_polygons(col = "coefforrest", 
              palette = "seq", #style="fisher",
              alpha = 1,title='Coefficient')+
  tm_layout(title = "Forest Area",
            title.position = c('right','top'),
            title.size = 1,
            aes.palette = list(seq = "-RdYlGn"),
            legend.position = c(0,0), 
            legend.text.size=0.6, 
            legend.title.size = 0.6,
            legend.title.fontface = 1,
            legend.height = 0.4,
            frame=FALSE)+
  tm_credits("(d)", position=c(0,0.85), size=1)

t_gwr<-tmap_arrange(roads,flats,altitude,forrest,ncol=2)  
#tmap_save(t_gwr, 'gwr_coef.png')
t_gwr
## Variable(s) "coefroads" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "coefflats" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "coefaltitude" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Variable(s) "coefforrest" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

df <- df %>%
  mutate(gwr.modelresids =gwr.model$lm$residuals)

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(df) +
  tm_polygons("gwr.modelresids",
              palette = "RdYlBu") 
## Variable(s) "gwr.modelresids" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#moran's I on residulas of gwr
Queen <- df %>%
  st_drop_geometry()%>%
  dplyr::select(gwr.modelresids)%>%
  pull()%>%
  moran.test(., Gminy.queens_weight)%>%
  tidy()

Queen
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic  p.value method            alternative
##       <dbl>     <dbl>     <dbl>     <dbl>    <dbl> <chr>             <chr>      
## 1     0.786  -0.00552   0.00196      17.9 9.29e-72 Moran I test und… greater
Nearest_neighbour <- df %>%
  st_drop_geometry()%>%
  dplyr::select(gwr.modelresids)%>%
  pull()%>%
  moran.test(., Gminy.knn_4_weight)%>%
  tidy()

Nearest_neighbour
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic  p.value method            alternative
##       <dbl>     <dbl>     <dbl>     <dbl>    <dbl> <chr>             <chr>      
## 1     0.833  -0.00552   0.00244      17.0 4.87e-65 Moran I test und… greater